clear all

* Estimation Sample
use "$working/BS/local1/inputs/ProdR_new_kl_sampleSS_allobs", clear
* 39,673

merge 1:m co_code1 year using "$working/prowess_wits_estimation_ronly_kl.dta"
keep if _merge == 3
drop _merge


* Create a dummy for firms that are multi-products at least in one year
bys co_code1: egen fobs = count(year)
egen tag = tag(co_code1 year)
egen fobsy = total(tag), by(co_code1)
gen multi_prod = fobs > fobsy
bys co_code1: egen sw = nvals(product_name5) if multi_prod == 0 // single product firms that are switching to another product
replace multi_prod = 1 if sw > 1 & sw != .
**

* To recreate the estimation sample 
collapse (mean) multi_prod, by(co_code1 year)

save "$working/estimation_sample.dta", replace



use "$working/prowess_wits_estimation_ronly_kl.dta", clear
merge m:1 co_code1 year using "$working/estimation_sample.dta"
keep if _merge == 3
drop _merge

merge m:1 co_code1 year using "$working/BS/local1/inputs/ProdR_new_kl_sampleSS"


bysort co_code1 prod_date3: replace nmbr_prods = _N
bys nic_08_2dig: egen uniqueproducts_full = nvals(product_name5)
bys nic_08_2dig: egen uniqueproducts_decomposition = nvals(product_name5) if _merge == 3

bys nic_08_2dig: egen nic4_full = nvals(nic_08_4dig)
bys nic_08_2dig: egen nic4_decomposition = nvals(nic_08_4dig) if _merge == 3

bys nic_08_2dig: egen uniquefirms_full = nvals(co_code1 year)
bys nic_08_2dig: egen uniquefirms_decomposition = nvals(co_code1 year) if _merge == 3

bys nic_08_2dig: egen uniquespfs_full = nvals(co_code1 year) if multi_prod==0
bys nic_08_2dig: egen uniquespfs_decomposition = nvals(co_code1 year) if multi_prod==0 & _merge == 3

foreach v in uniqueproducts_full uniqueproducts_decomposition uniquefirms_full uniquefirms_decomposition uniquespfs_full uniquespfs_decomposition nic4_full nic4_decomposition {
bys nic_08_2dig year: egen `v'1 = mean(`v')
bys nic_08_2dig: egen `v'2 = mean(`v'1)
}

contract nic_08_2dig uniqueproducts_full2 uniqueproducts_decomposition2 uniquefirms_full2 uniquefirms_decomposition2 uniquespfs_full2 uniquespfs_decomposition2  nic4_full nic4_decomposition
drop _freq
drop if missing(nic4_decomposition )
	count
	local k=r(N)+1
	set obs `k'
	foreach v in uniqueproducts_full uniqueproducts_decomposition uniquefirms_full uniquefirms_decomposition uniquespfs_full uniquespfs_decomposition  nic4_full nic4_decomposition {
		egen sum_`v' = sum(`v')
		replace `v' = sum_`v' if nic_08_2dig==""
		drop sum_`v'
		}

replace nic_08_2dig = "Total" if nic_08_2dig == ""
la var nic_08_2dig Industry
la var uniqueproducts_full2 "Products"
la var uniqueproducts_decomposition "Decomposition"
la var uniquefirms_full "Firms"
la var uniquefirms_decomposition "Decomposition Sample"
la var uniquespfs_full "Single-product firms"
la var uniquespfs_decomposition "SPF Decomposition"
la var nic4_full "4-digit NICs"
la var nic4_decomposition "4-digit decompostition"

sort nic_08_2dig
	replace nic_08_2dig = "10 Food" if nic_08_2dig == "10"
	replace nic_08_2dig = "13 Textiles" if nic_08_2dig== "13"
	replace nic_08_2dig = "17 Paper" if nic_08_2dig == "17"
	replace nic_08_2dig = "20 Chemicals" if nic_08_2dig == "20"
	replace nic_08_2dig = "21 Parmaceuticals" if nic_08_2dig == "21"
	replace nic_08_2dig = "22 Rubber and Plastic" if nic_08_2dig == "22"
	replace nic_08_2dig = "23 Non-metallic minerals" if nic_08_2dig == "23"
	replace nic_08_2dig = "24 Basic Metals" if nic_08_2dig == "24"
	replace nic_08_2dig = "25 Fabricated Metals" if nic_08_2dig == "25"
	replace nic_08_2dig = "26 Computer and Electronics" if nic_08_2dig == "26"
	replace nic_08_2dig = "27 Electrical Equipment" if nic_08_2dig == "27"
	replace nic_08_2dig = "28 Machinery" if nic_08_2dig == "28"
	replace nic_08_2dig = "29 Motor Vehicles" if nic_08_2dig == "29"
order nic_08_2dig uniquefirms_full2 uniquefirms_decomposition2 uniquespfs_full2 uniquespfs_decomposition2 nic4_full nic4_decomposition uniqueproducts_full2 uniqueproducts_decomposition2
rename uniqueproducts_decomposition2 unq
texsave nic_08_2dig uniquefirms_full2 uniquefirms_decomposition2 uniquespfs_full2 uniquespfs_decomposition2 nic4_full nic4_decomposition uniqueproducts_full2 unq  using "$tables/sumstat.tex", ///
	footnote("Notes: Columns (1) and (2) show the total number of firm-year observations for the full and decomposition samples, respectively. Columns (3) and (4) report the number of all firm-year observations for single-product firms. Columns (5) and (6) report the number of 4-digit industry codes. The last two columns report the number of products --- defined by unique product names --- in each sector.") ///
	title("Prowess Sample Summary Statistics (1989 - 2018)") ///
	nofix  replace location(h)  varlab marker(tab:sumstat) frag ///
	autonumber hlines(13) 
